{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Count / frequency encoding"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A1</th>\n",
       "      <th>A2</th>\n",
       "      <th>A3</th>\n",
       "      <th>A4</th>\n",
       "      <th>A5</th>\n",
       "      <th>A6</th>\n",
       "      <th>A7</th>\n",
       "      <th>A8</th>\n",
       "      <th>A9</th>\n",
       "      <th>A10</th>\n",
       "      <th>A11</th>\n",
       "      <th>A12</th>\n",
       "      <th>A13</th>\n",
       "      <th>A14</th>\n",
       "      <th>A15</th>\n",
       "      <th>target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>b</td>\n",
       "      <td>30.83</td>\n",
       "      <td>0.000</td>\n",
       "      <td>u</td>\n",
       "      <td>g</td>\n",
       "      <td>w</td>\n",
       "      <td>v</td>\n",
       "      <td>1.25</td>\n",
       "      <td>t</td>\n",
       "      <td>t</td>\n",
       "      <td>1</td>\n",
       "      <td>f</td>\n",
       "      <td>g</td>\n",
       "      <td>202.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>a</td>\n",
       "      <td>58.67</td>\n",
       "      <td>4.460</td>\n",
       "      <td>u</td>\n",
       "      <td>g</td>\n",
       "      <td>q</td>\n",
       "      <td>h</td>\n",
       "      <td>3.04</td>\n",
       "      <td>t</td>\n",
       "      <td>t</td>\n",
       "      <td>6</td>\n",
       "      <td>f</td>\n",
       "      <td>g</td>\n",
       "      <td>43.0</td>\n",
       "      <td>560</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>a</td>\n",
       "      <td>24.50</td>\n",
       "      <td>0.500</td>\n",
       "      <td>u</td>\n",
       "      <td>g</td>\n",
       "      <td>q</td>\n",
       "      <td>h</td>\n",
       "      <td>1.50</td>\n",
       "      <td>t</td>\n",
       "      <td>f</td>\n",
       "      <td>0</td>\n",
       "      <td>f</td>\n",
       "      <td>g</td>\n",
       "      <td>280.0</td>\n",
       "      <td>824</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>b</td>\n",
       "      <td>27.83</td>\n",
       "      <td>1.540</td>\n",
       "      <td>u</td>\n",
       "      <td>g</td>\n",
       "      <td>w</td>\n",
       "      <td>v</td>\n",
       "      <td>3.75</td>\n",
       "      <td>t</td>\n",
       "      <td>t</td>\n",
       "      <td>5</td>\n",
       "      <td>t</td>\n",
       "      <td>g</td>\n",
       "      <td>100.0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>b</td>\n",
       "      <td>20.17</td>\n",
       "      <td>5.625</td>\n",
       "      <td>u</td>\n",
       "      <td>g</td>\n",
       "      <td>w</td>\n",
       "      <td>v</td>\n",
       "      <td>1.71</td>\n",
       "      <td>t</td>\n",
       "      <td>f</td>\n",
       "      <td>0</td>\n",
       "      <td>f</td>\n",
       "      <td>s</td>\n",
       "      <td>120.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  A1     A2     A3 A4 A5 A6 A7    A8 A9 A10  A11 A12 A13    A14  A15  target\n",
       "0  b  30.83  0.000  u  g  w  v  1.25  t   t    1   f   g  202.0    0       1\n",
       "1  a  58.67  4.460  u  g  q  h  3.04  t   t    6   f   g   43.0  560       1\n",
       "2  a  24.50  0.500  u  g  q  h  1.50  t   f    0   f   g  280.0  824       1\n",
       "3  b  27.83  1.540  u  g  w  v  3.75  t   t    5   t   g  100.0    3       1\n",
       "4  b  20.17  5.625  u  g  w  v  1.71  t   f    0   f   s  120.0    0       1"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# let's load the data set\n",
    "\n",
    "data = pd.read_csv(\"credit_approval_uci.csv\")\n",
    "\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((483, 15), (207, 15))"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Let's separate into training and testing sets\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(\n",
    "    data.drop(labels=[\"target\"], axis=1),  # predictors\n",
    "    data[\"target\"],  # target\n",
    "    test_size=0.3,  # percentage of observations in test set\n",
    "    random_state=0,  # seed to ensure reproducibility\n",
    ")\n",
    "\n",
    "X_train.shape, X_test.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Count encoding with pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'v': 277,\n",
       " 'h': 101,\n",
       " 'ff': 41,\n",
       " 'bb': 39,\n",
       " 'z': 7,\n",
       " 'dd': 5,\n",
       " 'j': 5,\n",
       " 'Missing': 4,\n",
       " 'n': 3,\n",
       " 'o': 1}"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# let's obtain the counts for each category in the variable A7\n",
    "\n",
    "counts = X_train[\"A7\"].value_counts().to_dict()\n",
    "\n",
    "counts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# replace the categories with the counts\n",
    "\n",
    "X_train_enc = X_train.copy()\n",
    "X_test_enc = X_test.copy()\n",
    "\n",
    "X_train_enc[\"A7\"] = X_train_enc[\"A7\"].map(counts)\n",
    "X_test_enc[\"A7\"] = X_test_enc[\"A7\"].map(counts)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "596    277\n",
       "303    277\n",
       "204    277\n",
       "351     41\n",
       "118    277\n",
       "247    101\n",
       "652    277\n",
       "513      5\n",
       "230    277\n",
       "250      7\n",
       "Name: A7, dtype: int64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# let's explore the result\n",
    "\n",
    "X_train_enc[\"A7\"].head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'c': 0.19254658385093168,\n",
       " 'q': 0.11594202898550725,\n",
       " 'w': 0.09937888198757763,\n",
       " 'i': 0.08488612836438923,\n",
       " 'ff': 0.07867494824016563,\n",
       " 'k': 0.07867494824016563,\n",
       " 'aa': 0.07039337474120083,\n",
       " 'cc': 0.062111801242236024,\n",
       " 'm': 0.053830227743271224,\n",
       " 'x': 0.049689440993788817,\n",
       " 'e': 0.043478260869565216,\n",
       " 'd': 0.043478260869565216,\n",
       " 'j': 0.016563146997929608,\n",
       " 'Missing': 0.008281573498964804,\n",
       " 'r': 0.002070393374741201}"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# if instead of the count we would like the frequency:\n",
    "\n",
    "frequencies = X_train[\"A6\"].value_counts(normalize=True).to_dict()\n",
    "\n",
    "frequencies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# replace the categories with the frequency\n",
    "\n",
    "X_train_enc[\"A6\"] = X_train_enc[\"A6\"].map(frequencies)\n",
    "X_test_enc[\"A6\"] = X_test_enc[\"A6\"].map(frequencies)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Count encoding with Feature-engine"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "from feature_engine.encoding import CountFrequencyEncoder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# let's set up the encoder\n",
    "\n",
    "count_enc = CountFrequencyEncoder(\n",
    "    encoding_method=\"count\",  # to do frequency ==> encoding_method='frequency'\n",
    "    variables=None,  # to select all categorical variables\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-1 {\n",
       "  /* Definition of color scheme common for light and dark mode */\n",
       "  --sklearn-color-text: black;\n",
       "  --sklearn-color-line: gray;\n",
       "  /* Definition of color scheme for unfitted estimators */\n",
       "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
       "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
       "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
       "  --sklearn-color-unfitted-level-3: chocolate;\n",
       "  /* Definition of color scheme for fitted estimators */\n",
       "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
       "  --sklearn-color-fitted-level-1: #d4ebff;\n",
       "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
       "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
       "\n",
       "  /* Specific color for light theme */\n",
       "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
       "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-icon: #696969;\n",
       "\n",
       "  @media (prefers-color-scheme: dark) {\n",
       "    /* Redefinition of color scheme for dark theme */\n",
       "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
       "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-icon: #878787;\n",
       "  }\n",
       "}\n",
       "\n",
       "#sk-container-id-1 {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 pre {\n",
       "  padding: 0;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 input.sk-hidden--visually {\n",
       "  border: 0;\n",
       "  clip: rect(1px 1px 1px 1px);\n",
       "  clip: rect(1px, 1px, 1px, 1px);\n",
       "  height: 1px;\n",
       "  margin: -1px;\n",
       "  overflow: hidden;\n",
       "  padding: 0;\n",
       "  position: absolute;\n",
       "  width: 1px;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-dashed-wrapped {\n",
       "  border: 1px dashed var(--sklearn-color-line);\n",
       "  margin: 0 0.4em 0.5em 0.4em;\n",
       "  box-sizing: border-box;\n",
       "  padding-bottom: 0.4em;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-container {\n",
       "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
       "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
       "     so we also need the `!important` here to be able to override the\n",
       "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
       "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
       "  display: inline-block !important;\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-text-repr-fallback {\n",
       "  display: none;\n",
       "}\n",
       "\n",
       "div.sk-parallel-item,\n",
       "div.sk-serial,\n",
       "div.sk-item {\n",
       "  /* draw centered vertical line to link estimators */\n",
       "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
       "  background-size: 2px 100%;\n",
       "  background-repeat: no-repeat;\n",
       "  background-position: center center;\n",
       "}\n",
       "\n",
       "/* Parallel-specific style estimator block */\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item::after {\n",
       "  content: \"\";\n",
       "  width: 100%;\n",
       "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
       "  flex-grow: 1;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel {\n",
       "  display: flex;\n",
       "  align-items: stretch;\n",
       "  justify-content: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item:first-child::after {\n",
       "  align-self: flex-end;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item:last-child::after {\n",
       "  align-self: flex-start;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-parallel-item:only-child::after {\n",
       "  width: 0;\n",
       "}\n",
       "\n",
       "/* Serial-specific style estimator block */\n",
       "\n",
       "#sk-container-id-1 div.sk-serial {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "  align-items: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  padding-right: 1em;\n",
       "  padding-left: 1em;\n",
       "}\n",
       "\n",
       "\n",
       "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
       "clickable and can be expanded/collapsed.\n",
       "- Pipeline and ColumnTransformer use this feature and define the default style\n",
       "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
       "*/\n",
       "\n",
       "/* Pipeline and ColumnTransformer style (default) */\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable {\n",
       "  /* Default theme specific background. It is overwritten whether we have a\n",
       "  specific estimator or a Pipeline/ColumnTransformer */\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "/* Toggleable label */\n",
       "#sk-container-id-1 label.sk-toggleable__label {\n",
       "  cursor: pointer;\n",
       "  display: block;\n",
       "  width: 100%;\n",
       "  margin-bottom: 0;\n",
       "  padding: 0.5em;\n",
       "  box-sizing: border-box;\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n",
       "  /* Arrow on the left of the label */\n",
       "  content: \"▸\";\n",
       "  float: left;\n",
       "  margin-right: 0.25em;\n",
       "  color: var(--sklearn-color-icon);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "/* Toggleable content - dropdown */\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable__content {\n",
       "  max-height: 0;\n",
       "  max-width: 0;\n",
       "  overflow: hidden;\n",
       "  text-align: left;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable__content.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable__content pre {\n",
       "  margin: 0.2em;\n",
       "  border-radius: 0.25em;\n",
       "  color: var(--sklearn-color-text);\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
       "  /* Expand drop-down */\n",
       "  max-height: 200px;\n",
       "  max-width: 100%;\n",
       "  overflow: auto;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
       "  content: \"▾\";\n",
       "}\n",
       "\n",
       "/* Pipeline/ColumnTransformer-specific style */\n",
       "\n",
       "#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator-specific style */\n",
       "\n",
       "/* Colorize estimator box */\n",
       "#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n",
       "#sk-container-id-1 div.sk-label label {\n",
       "  /* The background is the default theme color */\n",
       "  color: var(--sklearn-color-text-on-default-background);\n",
       "}\n",
       "\n",
       "/* On hover, darken the color of the background */\n",
       "#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "/* Label box, darken color on hover, fitted */\n",
       "#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator label */\n",
       "\n",
       "#sk-container-id-1 div.sk-label label {\n",
       "  font-family: monospace;\n",
       "  font-weight: bold;\n",
       "  display: inline-block;\n",
       "  line-height: 1.2em;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-label-container {\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "/* Estimator-specific */\n",
       "#sk-container-id-1 div.sk-estimator {\n",
       "  font-family: monospace;\n",
       "  border: 1px dotted var(--sklearn-color-border-box);\n",
       "  border-radius: 0.25em;\n",
       "  box-sizing: border-box;\n",
       "  margin-bottom: 0.5em;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-estimator.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "/* on hover */\n",
       "#sk-container-id-1 div.sk-estimator:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-1 div.sk-estimator.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
       "\n",
       "/* Common style for \"i\" and \"?\" */\n",
       "\n",
       ".sk-estimator-doc-link,\n",
       "a:link.sk-estimator-doc-link,\n",
       "a:visited.sk-estimator-doc-link {\n",
       "  float: right;\n",
       "  font-size: smaller;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1em;\n",
       "  height: 1em;\n",
       "  width: 1em;\n",
       "  text-decoration: none !important;\n",
       "  margin-left: 1ex;\n",
       "  /* unfitted */\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted,\n",
       "a:link.sk-estimator-doc-link.fitted,\n",
       "a:visited.sk-estimator-doc-link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "/* Span, style for the box shown on hovering the info icon */\n",
       ".sk-estimator-doc-link span {\n",
       "  display: none;\n",
       "  z-index: 9999;\n",
       "  position: relative;\n",
       "  font-weight: normal;\n",
       "  right: .2ex;\n",
       "  padding: .5ex;\n",
       "  margin: .5ex;\n",
       "  width: min-content;\n",
       "  min-width: 20ex;\n",
       "  max-width: 50ex;\n",
       "  color: var(--sklearn-color-text);\n",
       "  box-shadow: 2pt 2pt 4pt #999;\n",
       "  /* unfitted */\n",
       "  background: var(--sklearn-color-unfitted-level-0);\n",
       "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted span {\n",
       "  /* fitted */\n",
       "  background: var(--sklearn-color-fitted-level-0);\n",
       "  border: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link:hover span {\n",
       "  display: block;\n",
       "}\n",
       "\n",
       "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
       "\n",
       "#sk-container-id-1 a.estimator_doc_link {\n",
       "  float: right;\n",
       "  font-size: 1rem;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1rem;\n",
       "  height: 1rem;\n",
       "  width: 1rem;\n",
       "  text-decoration: none;\n",
       "  /* unfitted */\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 a.estimator_doc_link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "#sk-container-id-1 a.estimator_doc_link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>CountFrequencyEncoder()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;CountFrequencyEncoder<span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>CountFrequencyEncoder()</pre></div> </div></div></div></div>"
      ],
      "text/plain": [
       "CountFrequencyEncoder()"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# fit the encoder to the train set\n",
    "\n",
    "count_enc.fit(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['A1', 'A4', 'A5', 'A6', 'A7', 'A9', 'A10', 'A12', 'A13']"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# the encoder finds categorical variables automatically\n",
    "\n",
    "count_enc.variables_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'A1': {'b': 335, 'a': 144, 'Missing': 4},\n",
       " 'A4': {'u': 363, 'y': 115, 'Missing': 4, 'l': 1},\n",
       " 'A5': {'g': 363, 'p': 115, 'Missing': 4, 'gg': 1},\n",
       " 'A6': {'c': 93,\n",
       "  'q': 56,\n",
       "  'w': 48,\n",
       "  'i': 41,\n",
       "  'ff': 38,\n",
       "  'k': 38,\n",
       "  'aa': 34,\n",
       "  'cc': 30,\n",
       "  'm': 26,\n",
       "  'x': 24,\n",
       "  'e': 21,\n",
       "  'd': 21,\n",
       "  'j': 8,\n",
       "  'Missing': 4,\n",
       "  'r': 1},\n",
       " 'A7': {'v': 277,\n",
       "  'h': 101,\n",
       "  'ff': 41,\n",
       "  'bb': 39,\n",
       "  'z': 7,\n",
       "  'dd': 5,\n",
       "  'j': 5,\n",
       "  'Missing': 4,\n",
       "  'n': 3,\n",
       "  'o': 1},\n",
       " 'A9': {'t': 256, 'f': 227},\n",
       " 'A10': {'f': 271, 't': 212},\n",
       " 'A12': {'f': 263, 't': 220},\n",
       " 'A13': {'g': 441, 's': 38, 'p': 4}}"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# the encoder_dict_ attribute contains the counts\n",
    "# per category per variable.\n",
    "\n",
    "count_enc.encoder_dict_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "# let's transform train and test sets\n",
    "\n",
    "X_train_enc = count_enc.transform(X_train)\n",
    "X_test_enc = count_enc.transform(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A1</th>\n",
       "      <th>A2</th>\n",
       "      <th>A3</th>\n",
       "      <th>A4</th>\n",
       "      <th>A5</th>\n",
       "      <th>A6</th>\n",
       "      <th>A7</th>\n",
       "      <th>A8</th>\n",
       "      <th>A9</th>\n",
       "      <th>A10</th>\n",
       "      <th>A11</th>\n",
       "      <th>A12</th>\n",
       "      <th>A13</th>\n",
       "      <th>A14</th>\n",
       "      <th>A15</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>596</th>\n",
       "      <td>144</td>\n",
       "      <td>46.08</td>\n",
       "      <td>3.000</td>\n",
       "      <td>363</td>\n",
       "      <td>363</td>\n",
       "      <td>93</td>\n",
       "      <td>277</td>\n",
       "      <td>2.375</td>\n",
       "      <td>256</td>\n",
       "      <td>212</td>\n",
       "      <td>8</td>\n",
       "      <td>220</td>\n",
       "      <td>441</td>\n",
       "      <td>396.0</td>\n",
       "      <td>4159</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>303</th>\n",
       "      <td>144</td>\n",
       "      <td>15.92</td>\n",
       "      <td>2.875</td>\n",
       "      <td>363</td>\n",
       "      <td>363</td>\n",
       "      <td>56</td>\n",
       "      <td>277</td>\n",
       "      <td>0.085</td>\n",
       "      <td>227</td>\n",
       "      <td>271</td>\n",
       "      <td>0</td>\n",
       "      <td>263</td>\n",
       "      <td>441</td>\n",
       "      <td>120.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>204</th>\n",
       "      <td>335</td>\n",
       "      <td>36.33</td>\n",
       "      <td>2.125</td>\n",
       "      <td>115</td>\n",
       "      <td>115</td>\n",
       "      <td>48</td>\n",
       "      <td>277</td>\n",
       "      <td>0.085</td>\n",
       "      <td>256</td>\n",
       "      <td>212</td>\n",
       "      <td>1</td>\n",
       "      <td>263</td>\n",
       "      <td>441</td>\n",
       "      <td>50.0</td>\n",
       "      <td>1187</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>351</th>\n",
       "      <td>335</td>\n",
       "      <td>22.17</td>\n",
       "      <td>0.585</td>\n",
       "      <td>115</td>\n",
       "      <td>115</td>\n",
       "      <td>38</td>\n",
       "      <td>41</td>\n",
       "      <td>0.000</td>\n",
       "      <td>227</td>\n",
       "      <td>271</td>\n",
       "      <td>0</td>\n",
       "      <td>263</td>\n",
       "      <td>441</td>\n",
       "      <td>100.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>118</th>\n",
       "      <td>335</td>\n",
       "      <td>57.83</td>\n",
       "      <td>7.040</td>\n",
       "      <td>363</td>\n",
       "      <td>363</td>\n",
       "      <td>26</td>\n",
       "      <td>277</td>\n",
       "      <td>14.000</td>\n",
       "      <td>256</td>\n",
       "      <td>212</td>\n",
       "      <td>6</td>\n",
       "      <td>220</td>\n",
       "      <td>441</td>\n",
       "      <td>360.0</td>\n",
       "      <td>1332</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      A1     A2     A3   A4   A5  A6   A7      A8   A9  A10  A11  A12  A13  \\\n",
       "596  144  46.08  3.000  363  363  93  277   2.375  256  212    8  220  441   \n",
       "303  144  15.92  2.875  363  363  56  277   0.085  227  271    0  263  441   \n",
       "204  335  36.33  2.125  115  115  48  277   0.085  256  212    1  263  441   \n",
       "351  335  22.17  0.585  115  115  38   41   0.000  227  271    0  263  441   \n",
       "118  335  57.83  7.040  363  363  26  277  14.000  256  212    6  220  441   \n",
       "\n",
       "       A14   A15  \n",
       "596  396.0  4159  \n",
       "303  120.0     0  \n",
       "204   50.0  1187  \n",
       "351  100.0     0  \n",
       "118  360.0  1332  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# let's inspect the result\n",
    "\n",
    "X_train_enc.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A1</th>\n",
       "      <th>A2</th>\n",
       "      <th>A3</th>\n",
       "      <th>A4</th>\n",
       "      <th>A5</th>\n",
       "      <th>A6</th>\n",
       "      <th>A7</th>\n",
       "      <th>A8</th>\n",
       "      <th>A9</th>\n",
       "      <th>A10</th>\n",
       "      <th>A11</th>\n",
       "      <th>A12</th>\n",
       "      <th>A13</th>\n",
       "      <th>A14</th>\n",
       "      <th>A15</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>144</td>\n",
       "      <td>45.83</td>\n",
       "      <td>10.50</td>\n",
       "      <td>363</td>\n",
       "      <td>363</td>\n",
       "      <td>56</td>\n",
       "      <td>277</td>\n",
       "      <td>5.000</td>\n",
       "      <td>256</td>\n",
       "      <td>212</td>\n",
       "      <td>7</td>\n",
       "      <td>220</td>\n",
       "      <td>441</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>586</th>\n",
       "      <td>335</td>\n",
       "      <td>64.08</td>\n",
       "      <td>20.00</td>\n",
       "      <td>363</td>\n",
       "      <td>363</td>\n",
       "      <td>24</td>\n",
       "      <td>101</td>\n",
       "      <td>17.500</td>\n",
       "      <td>256</td>\n",
       "      <td>212</td>\n",
       "      <td>9</td>\n",
       "      <td>220</td>\n",
       "      <td>441</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>140</th>\n",
       "      <td>144</td>\n",
       "      <td>31.25</td>\n",
       "      <td>3.75</td>\n",
       "      <td>363</td>\n",
       "      <td>363</td>\n",
       "      <td>30</td>\n",
       "      <td>101</td>\n",
       "      <td>0.625</td>\n",
       "      <td>256</td>\n",
       "      <td>212</td>\n",
       "      <td>9</td>\n",
       "      <td>220</td>\n",
       "      <td>441</td>\n",
       "      <td>181.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>492</th>\n",
       "      <td>335</td>\n",
       "      <td>39.25</td>\n",
       "      <td>9.50</td>\n",
       "      <td>363</td>\n",
       "      <td>363</td>\n",
       "      <td>26</td>\n",
       "      <td>277</td>\n",
       "      <td>6.500</td>\n",
       "      <td>256</td>\n",
       "      <td>212</td>\n",
       "      <td>14</td>\n",
       "      <td>263</td>\n",
       "      <td>441</td>\n",
       "      <td>240.0</td>\n",
       "      <td>4607</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>350</th>\n",
       "      <td>144</td>\n",
       "      <td>26.17</td>\n",
       "      <td>2.00</td>\n",
       "      <td>363</td>\n",
       "      <td>363</td>\n",
       "      <td>8</td>\n",
       "      <td>5</td>\n",
       "      <td>0.000</td>\n",
       "      <td>227</td>\n",
       "      <td>271</td>\n",
       "      <td>0</td>\n",
       "      <td>220</td>\n",
       "      <td>441</td>\n",
       "      <td>276.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      A1     A2     A3   A4   A5  A6   A7      A8   A9  A10  A11  A12  A13  \\\n",
       "14   144  45.83  10.50  363  363  56  277   5.000  256  212    7  220  441   \n",
       "586  335  64.08  20.00  363  363  24  101  17.500  256  212    9  220  441   \n",
       "140  144  31.25   3.75  363  363  30  101   0.625  256  212    9  220  441   \n",
       "492  335  39.25   9.50  363  363  26  277   6.500  256  212   14  263  441   \n",
       "350  144  26.17   2.00  363  363   8    5   0.000  227  271    0  220  441   \n",
       "\n",
       "       A14   A15  \n",
       "14     0.0     0  \n",
       "586    0.0  1000  \n",
       "140  181.0     0  \n",
       "492  240.0  4607  \n",
       "350  276.0     1  "
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_test_enc.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Count encoding with Category Encoders"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "from category_encoders.count import CountEncoder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "# let's set up the encoder to encode all\n",
    "# categorical variables with counts\n",
    "\n",
    "count_enc = CountEncoder(cols=None)\n",
    "\n",
    "# for frequency encoding ==> set parameter normalize=True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-2 {\n",
       "  /* Definition of color scheme common for light and dark mode */\n",
       "  --sklearn-color-text: black;\n",
       "  --sklearn-color-line: gray;\n",
       "  /* Definition of color scheme for unfitted estimators */\n",
       "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
       "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
       "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
       "  --sklearn-color-unfitted-level-3: chocolate;\n",
       "  /* Definition of color scheme for fitted estimators */\n",
       "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
       "  --sklearn-color-fitted-level-1: #d4ebff;\n",
       "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
       "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
       "\n",
       "  /* Specific color for light theme */\n",
       "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
       "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
       "  --sklearn-color-icon: #696969;\n",
       "\n",
       "  @media (prefers-color-scheme: dark) {\n",
       "    /* Redefinition of color scheme for dark theme */\n",
       "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
       "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
       "    --sklearn-color-icon: #878787;\n",
       "  }\n",
       "}\n",
       "\n",
       "#sk-container-id-2 {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 pre {\n",
       "  padding: 0;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 input.sk-hidden--visually {\n",
       "  border: 0;\n",
       "  clip: rect(1px 1px 1px 1px);\n",
       "  clip: rect(1px, 1px, 1px, 1px);\n",
       "  height: 1px;\n",
       "  margin: -1px;\n",
       "  overflow: hidden;\n",
       "  padding: 0;\n",
       "  position: absolute;\n",
       "  width: 1px;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-dashed-wrapped {\n",
       "  border: 1px dashed var(--sklearn-color-line);\n",
       "  margin: 0 0.4em 0.5em 0.4em;\n",
       "  box-sizing: border-box;\n",
       "  padding-bottom: 0.4em;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-container {\n",
       "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
       "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
       "     so we also need the `!important` here to be able to override the\n",
       "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
       "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
       "  display: inline-block !important;\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-text-repr-fallback {\n",
       "  display: none;\n",
       "}\n",
       "\n",
       "div.sk-parallel-item,\n",
       "div.sk-serial,\n",
       "div.sk-item {\n",
       "  /* draw centered vertical line to link estimators */\n",
       "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
       "  background-size: 2px 100%;\n",
       "  background-repeat: no-repeat;\n",
       "  background-position: center center;\n",
       "}\n",
       "\n",
       "/* Parallel-specific style estimator block */\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel-item::after {\n",
       "  content: \"\";\n",
       "  width: 100%;\n",
       "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
       "  flex-grow: 1;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel {\n",
       "  display: flex;\n",
       "  align-items: stretch;\n",
       "  justify-content: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  position: relative;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel-item {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel-item:first-child::after {\n",
       "  align-self: flex-end;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel-item:last-child::after {\n",
       "  align-self: flex-start;\n",
       "  width: 50%;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-parallel-item:only-child::after {\n",
       "  width: 0;\n",
       "}\n",
       "\n",
       "/* Serial-specific style estimator block */\n",
       "\n",
       "#sk-container-id-2 div.sk-serial {\n",
       "  display: flex;\n",
       "  flex-direction: column;\n",
       "  align-items: center;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  padding-right: 1em;\n",
       "  padding-left: 1em;\n",
       "}\n",
       "\n",
       "\n",
       "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
       "clickable and can be expanded/collapsed.\n",
       "- Pipeline and ColumnTransformer use this feature and define the default style\n",
       "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
       "*/\n",
       "\n",
       "/* Pipeline and ColumnTransformer style (default) */\n",
       "\n",
       "#sk-container-id-2 div.sk-toggleable {\n",
       "  /* Default theme specific background. It is overwritten whether we have a\n",
       "  specific estimator or a Pipeline/ColumnTransformer */\n",
       "  background-color: var(--sklearn-color-background);\n",
       "}\n",
       "\n",
       "/* Toggleable label */\n",
       "#sk-container-id-2 label.sk-toggleable__label {\n",
       "  cursor: pointer;\n",
       "  display: block;\n",
       "  width: 100%;\n",
       "  margin-bottom: 0;\n",
       "  padding: 0.5em;\n",
       "  box-sizing: border-box;\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 label.sk-toggleable__label-arrow:before {\n",
       "  /* Arrow on the left of the label */\n",
       "  content: \"▸\";\n",
       "  float: left;\n",
       "  margin-right: 0.25em;\n",
       "  color: var(--sklearn-color-icon);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {\n",
       "  color: var(--sklearn-color-text);\n",
       "}\n",
       "\n",
       "/* Toggleable content - dropdown */\n",
       "\n",
       "#sk-container-id-2 div.sk-toggleable__content {\n",
       "  max-height: 0;\n",
       "  max-width: 0;\n",
       "  overflow: hidden;\n",
       "  text-align: left;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-toggleable__content.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-toggleable__content pre {\n",
       "  margin: 0.2em;\n",
       "  border-radius: 0.25em;\n",
       "  color: var(--sklearn-color-text);\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-toggleable__content.fitted pre {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
       "  /* Expand drop-down */\n",
       "  max-height: 200px;\n",
       "  max-width: 100%;\n",
       "  overflow: auto;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
       "  content: \"▾\";\n",
       "}\n",
       "\n",
       "/* Pipeline/ColumnTransformer-specific style */\n",
       "\n",
       "#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator-specific style */\n",
       "\n",
       "/* Colorize estimator box */\n",
       "#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-label label.sk-toggleable__label,\n",
       "#sk-container-id-2 div.sk-label label {\n",
       "  /* The background is the default theme color */\n",
       "  color: var(--sklearn-color-text-on-default-background);\n",
       "}\n",
       "\n",
       "/* On hover, darken the color of the background */\n",
       "#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "/* Label box, darken color on hover, fitted */\n",
       "#sk-container-id-2 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
       "  color: var(--sklearn-color-text);\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Estimator label */\n",
       "\n",
       "#sk-container-id-2 div.sk-label label {\n",
       "  font-family: monospace;\n",
       "  font-weight: bold;\n",
       "  display: inline-block;\n",
       "  line-height: 1.2em;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-label-container {\n",
       "  text-align: center;\n",
       "}\n",
       "\n",
       "/* Estimator-specific */\n",
       "#sk-container-id-2 div.sk-estimator {\n",
       "  font-family: monospace;\n",
       "  border: 1px dotted var(--sklearn-color-border-box);\n",
       "  border-radius: 0.25em;\n",
       "  box-sizing: border-box;\n",
       "  margin-bottom: 0.5em;\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-estimator.fitted {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
       "}\n",
       "\n",
       "/* on hover */\n",
       "#sk-container-id-2 div.sk-estimator:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
       "}\n",
       "\n",
       "#sk-container-id-2 div.sk-estimator.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
       "}\n",
       "\n",
       "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
       "\n",
       "/* Common style for \"i\" and \"?\" */\n",
       "\n",
       ".sk-estimator-doc-link,\n",
       "a:link.sk-estimator-doc-link,\n",
       "a:visited.sk-estimator-doc-link {\n",
       "  float: right;\n",
       "  font-size: smaller;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1em;\n",
       "  height: 1em;\n",
       "  width: 1em;\n",
       "  text-decoration: none !important;\n",
       "  margin-left: 1ex;\n",
       "  /* unfitted */\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted,\n",
       "a:link.sk-estimator-doc-link.fitted,\n",
       "a:visited.sk-estimator-doc-link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
       ".sk-estimator-doc-link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover,\n",
       "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
       ".sk-estimator-doc-link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "/* Span, style for the box shown on hovering the info icon */\n",
       ".sk-estimator-doc-link span {\n",
       "  display: none;\n",
       "  z-index: 9999;\n",
       "  position: relative;\n",
       "  font-weight: normal;\n",
       "  right: .2ex;\n",
       "  padding: .5ex;\n",
       "  margin: .5ex;\n",
       "  width: min-content;\n",
       "  min-width: 20ex;\n",
       "  max-width: 50ex;\n",
       "  color: var(--sklearn-color-text);\n",
       "  box-shadow: 2pt 2pt 4pt #999;\n",
       "  /* unfitted */\n",
       "  background: var(--sklearn-color-unfitted-level-0);\n",
       "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link.fitted span {\n",
       "  /* fitted */\n",
       "  background: var(--sklearn-color-fitted-level-0);\n",
       "  border: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "\n",
       ".sk-estimator-doc-link:hover span {\n",
       "  display: block;\n",
       "}\n",
       "\n",
       "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
       "\n",
       "#sk-container-id-2 a.estimator_doc_link {\n",
       "  float: right;\n",
       "  font-size: 1rem;\n",
       "  line-height: 1em;\n",
       "  font-family: monospace;\n",
       "  background-color: var(--sklearn-color-background);\n",
       "  border-radius: 1rem;\n",
       "  height: 1rem;\n",
       "  width: 1rem;\n",
       "  text-decoration: none;\n",
       "  /* unfitted */\n",
       "  color: var(--sklearn-color-unfitted-level-1);\n",
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 a.estimator_doc_link.fitted {\n",
       "  /* fitted */\n",
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
       "  color: var(--sklearn-color-fitted-level-1);\n",
       "}\n",
       "\n",
       "/* On hover */\n",
       "#sk-container-id-2 a.estimator_doc_link:hover {\n",
       "  /* unfitted */\n",
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
       "  color: var(--sklearn-color-background);\n",
       "  text-decoration: none;\n",
       "}\n",
       "\n",
       "#sk-container-id-2 a.estimator_doc_link.fitted:hover {\n",
       "  /* fitted */\n",
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
       "}\n",
       "</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>CountEncoder(cols=[&#x27;A1&#x27;, &#x27;A4&#x27;, &#x27;A5&#x27;, &#x27;A6&#x27;, &#x27;A7&#x27;, &#x27;A9&#x27;, &#x27;A10&#x27;, &#x27;A12&#x27;, &#x27;A13&#x27;],\n",
       "             combine_min_nan_groups=True)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;CountEncoder<span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>CountEncoder(cols=[&#x27;A1&#x27;, &#x27;A4&#x27;, &#x27;A5&#x27;, &#x27;A6&#x27;, &#x27;A7&#x27;, &#x27;A9&#x27;, &#x27;A10&#x27;, &#x27;A12&#x27;, &#x27;A13&#x27;],\n",
       "             combine_min_nan_groups=True)</pre></div> </div></div></div></div>"
      ],
      "text/plain": [
       "CountEncoder(cols=['A1', 'A4', 'A5', 'A6', 'A7', 'A9', 'A10', 'A12', 'A13'],\n",
       "             combine_min_nan_groups=True)"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# fit the encoder to the train set\n",
    "\n",
    "count_enc.fit(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['A1', 'A4', 'A5', 'A6', 'A7', 'A9', 'A10', 'A12', 'A13']"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# the encoder finds categorical variables automatically\n",
    "\n",
    "count_enc.cols"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'A1': A1\n",
       " b          335\n",
       " a          144\n",
       " Missing      4\n",
       " Name: count, dtype: int64,\n",
       " 'A4': A4\n",
       " u          363\n",
       " y          115\n",
       " Missing      4\n",
       " l            1\n",
       " Name: count, dtype: int64,\n",
       " 'A5': A5\n",
       " g          363\n",
       " p          115\n",
       " Missing      4\n",
       " gg           1\n",
       " Name: count, dtype: int64,\n",
       " 'A6': A6\n",
       " c          93\n",
       " q          56\n",
       " w          48\n",
       " i          41\n",
       " ff         38\n",
       " k          38\n",
       " aa         34\n",
       " cc         30\n",
       " m          26\n",
       " x          24\n",
       " e          21\n",
       " d          21\n",
       " j           8\n",
       " Missing     4\n",
       " r           1\n",
       " Name: count, dtype: int64,\n",
       " 'A7': A7\n",
       " v          277\n",
       " h          101\n",
       " ff          41\n",
       " bb          39\n",
       " z            7\n",
       " dd           5\n",
       " j            5\n",
       " Missing      4\n",
       " n            3\n",
       " o            1\n",
       " Name: count, dtype: int64,\n",
       " 'A9': A9\n",
       " t    256\n",
       " f    227\n",
       " Name: count, dtype: int64,\n",
       " 'A10': A10\n",
       " f    271\n",
       " t    212\n",
       " Name: count, dtype: int64,\n",
       " 'A12': A12\n",
       " f    263\n",
       " t    220\n",
       " Name: count, dtype: int64,\n",
       " 'A13': A13\n",
       " g    441\n",
       " s     38\n",
       " p      4\n",
       " Name: count, dtype: int64}"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# the enoding parameters are stored in the\n",
    "# attribute mappings:\n",
    "\n",
    "count_enc.mapping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "# let's transform train and test sets\n",
    "\n",
    "X_train_enc = count_enc.transform(X_train)\n",
    "X_test_enc = count_enc.transform(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A1</th>\n",
       "      <th>A2</th>\n",
       "      <th>A3</th>\n",
       "      <th>A4</th>\n",
       "      <th>A5</th>\n",
       "      <th>A6</th>\n",
       "      <th>A7</th>\n",
       "      <th>A8</th>\n",
       "      <th>A9</th>\n",
       "      <th>A10</th>\n",
       "      <th>A11</th>\n",
       "      <th>A12</th>\n",
       "      <th>A13</th>\n",
       "      <th>A14</th>\n",
       "      <th>A15</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>596</th>\n",
       "      <td>144</td>\n",
       "      <td>46.08</td>\n",
       "      <td>3.000</td>\n",
       "      <td>363</td>\n",
       "      <td>363</td>\n",
       "      <td>93</td>\n",
       "      <td>277</td>\n",
       "      <td>2.375</td>\n",
       "      <td>256</td>\n",
       "      <td>212</td>\n",
       "      <td>8</td>\n",
       "      <td>220</td>\n",
       "      <td>441</td>\n",
       "      <td>396.0</td>\n",
       "      <td>4159</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>303</th>\n",
       "      <td>144</td>\n",
       "      <td>15.92</td>\n",
       "      <td>2.875</td>\n",
       "      <td>363</td>\n",
       "      <td>363</td>\n",
       "      <td>56</td>\n",
       "      <td>277</td>\n",
       "      <td>0.085</td>\n",
       "      <td>227</td>\n",
       "      <td>271</td>\n",
       "      <td>0</td>\n",
       "      <td>263</td>\n",
       "      <td>441</td>\n",
       "      <td>120.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>204</th>\n",
       "      <td>335</td>\n",
       "      <td>36.33</td>\n",
       "      <td>2.125</td>\n",
       "      <td>115</td>\n",
       "      <td>115</td>\n",
       "      <td>48</td>\n",
       "      <td>277</td>\n",
       "      <td>0.085</td>\n",
       "      <td>256</td>\n",
       "      <td>212</td>\n",
       "      <td>1</td>\n",
       "      <td>263</td>\n",
       "      <td>441</td>\n",
       "      <td>50.0</td>\n",
       "      <td>1187</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>351</th>\n",
       "      <td>335</td>\n",
       "      <td>22.17</td>\n",
       "      <td>0.585</td>\n",
       "      <td>115</td>\n",
       "      <td>115</td>\n",
       "      <td>38</td>\n",
       "      <td>41</td>\n",
       "      <td>0.000</td>\n",
       "      <td>227</td>\n",
       "      <td>271</td>\n",
       "      <td>0</td>\n",
       "      <td>263</td>\n",
       "      <td>441</td>\n",
       "      <td>100.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>118</th>\n",
       "      <td>335</td>\n",
       "      <td>57.83</td>\n",
       "      <td>7.040</td>\n",
       "      <td>363</td>\n",
       "      <td>363</td>\n",
       "      <td>26</td>\n",
       "      <td>277</td>\n",
       "      <td>14.000</td>\n",
       "      <td>256</td>\n",
       "      <td>212</td>\n",
       "      <td>6</td>\n",
       "      <td>220</td>\n",
       "      <td>441</td>\n",
       "      <td>360.0</td>\n",
       "      <td>1332</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      A1     A2     A3   A4   A5  A6   A7      A8   A9  A10  A11  A12  A13  \\\n",
       "596  144  46.08  3.000  363  363  93  277   2.375  256  212    8  220  441   \n",
       "303  144  15.92  2.875  363  363  56  277   0.085  227  271    0  263  441   \n",
       "204  335  36.33  2.125  115  115  48  277   0.085  256  212    1  263  441   \n",
       "351  335  22.17  0.585  115  115  38   41   0.000  227  271    0  263  441   \n",
       "118  335  57.83  7.040  363  363  26  277  14.000  256  212    6  220  441   \n",
       "\n",
       "       A14   A15  \n",
       "596  396.0  4159  \n",
       "303  120.0     0  \n",
       "204   50.0  1187  \n",
       "351  100.0     0  \n",
       "118  360.0  1332  "
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# let's inspect the result\n",
    "\n",
    "X_train_enc.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A1</th>\n",
       "      <th>A2</th>\n",
       "      <th>A3</th>\n",
       "      <th>A4</th>\n",
       "      <th>A5</th>\n",
       "      <th>A6</th>\n",
       "      <th>A7</th>\n",
       "      <th>A8</th>\n",
       "      <th>A9</th>\n",
       "      <th>A10</th>\n",
       "      <th>A11</th>\n",
       "      <th>A12</th>\n",
       "      <th>A13</th>\n",
       "      <th>A14</th>\n",
       "      <th>A15</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>144</td>\n",
       "      <td>45.83</td>\n",
       "      <td>10.50</td>\n",
       "      <td>363</td>\n",
       "      <td>363</td>\n",
       "      <td>56</td>\n",
       "      <td>277</td>\n",
       "      <td>5.000</td>\n",
       "      <td>256</td>\n",
       "      <td>212</td>\n",
       "      <td>7</td>\n",
       "      <td>220</td>\n",
       "      <td>441</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>586</th>\n",
       "      <td>335</td>\n",
       "      <td>64.08</td>\n",
       "      <td>20.00</td>\n",
       "      <td>363</td>\n",
       "      <td>363</td>\n",
       "      <td>24</td>\n",
       "      <td>101</td>\n",
       "      <td>17.500</td>\n",
       "      <td>256</td>\n",
       "      <td>212</td>\n",
       "      <td>9</td>\n",
       "      <td>220</td>\n",
       "      <td>441</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>140</th>\n",
       "      <td>144</td>\n",
       "      <td>31.25</td>\n",
       "      <td>3.75</td>\n",
       "      <td>363</td>\n",
       "      <td>363</td>\n",
       "      <td>30</td>\n",
       "      <td>101</td>\n",
       "      <td>0.625</td>\n",
       "      <td>256</td>\n",
       "      <td>212</td>\n",
       "      <td>9</td>\n",
       "      <td>220</td>\n",
       "      <td>441</td>\n",
       "      <td>181.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>492</th>\n",
       "      <td>335</td>\n",
       "      <td>39.25</td>\n",
       "      <td>9.50</td>\n",
       "      <td>363</td>\n",
       "      <td>363</td>\n",
       "      <td>26</td>\n",
       "      <td>277</td>\n",
       "      <td>6.500</td>\n",
       "      <td>256</td>\n",
       "      <td>212</td>\n",
       "      <td>14</td>\n",
       "      <td>263</td>\n",
       "      <td>441</td>\n",
       "      <td>240.0</td>\n",
       "      <td>4607</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>350</th>\n",
       "      <td>144</td>\n",
       "      <td>26.17</td>\n",
       "      <td>2.00</td>\n",
       "      <td>363</td>\n",
       "      <td>363</td>\n",
       "      <td>8</td>\n",
       "      <td>5</td>\n",
       "      <td>0.000</td>\n",
       "      <td>227</td>\n",
       "      <td>271</td>\n",
       "      <td>0</td>\n",
       "      <td>220</td>\n",
       "      <td>441</td>\n",
       "      <td>276.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      A1     A2     A3   A4   A5  A6   A7      A8   A9  A10  A11  A12  A13  \\\n",
       "14   144  45.83  10.50  363  363  56  277   5.000  256  212    7  220  441   \n",
       "586  335  64.08  20.00  363  363  24  101  17.500  256  212    9  220  441   \n",
       "140  144  31.25   3.75  363  363  30  101   0.625  256  212    9  220  441   \n",
       "492  335  39.25   9.50  363  363  26  277   6.500  256  212   14  263  441   \n",
       "350  144  26.17   2.00  363  363   8    5   0.000  227  271    0  220  441   \n",
       "\n",
       "       A14   A15  \n",
       "14     0.0     0  \n",
       "586    0.0  1000  \n",
       "140  181.0     0  \n",
       "492  240.0  4607  \n",
       "350  276.0     1  "
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_test_enc.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "fsml",
   "language": "python",
   "name": "fsml"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.5"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {
    "height": "921.556px",
    "left": "0px",
    "right": "1827px",
    "top": "110.444px",
    "width": "173.333px"
   },
   "toc_section_display": "block",
   "toc_window_display": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
